Load required libraries, installing them beforehand if necessary, for use in the subsequent steps:

if (!'tidyr' %in% installed.packages()) {
    install.packages('tidyr')
}
library(tidyr)

if (!'dplyr' %in% installed.packages()) {
    install.packages('dplyr')
}
library(dplyr)

if (!'ggplot2' %in% installed.packages()) {
    install.packages('ggplot2')
}
library(ggplot2)

if (!'stringr' %in% installed.packages()) {
    install.packages('stringr')
}
library(stringr)

if (!'lubridate' %in% installed.packages()) {
    install.packages('lubridate')
}
library(lubridate)

if (!'ggdark' %in% installed.packages()) {
    install.packages('ggdark')
}
library(ggdark)

if (!'ggExtra' %in% installed.packages()) {
    install.packages('ggExtra')
}
library(ggExtra)

if (!'schoolmath' %in% installed.packages()) {
    install.packages('schoolmath')
}
library(schoolmath)

if (!'plotly' %in% installed.packages()) {
    install.packages('plotly')
}
library(plotly)

if (!"tufte" %in% installed.packages()) {
    install.packages("tufte")
}
library(tufte)

if (!'ggthemes' %in% installed.packages()){
    install.packages('ggthemes')
}
library(ggthemes)

if(!'data.table' %in% installed.packages()){
    install.packages('data.table')
}
library(data.table)

if(!'gapminder' %in% installed.packages()){
    install.packages('gapminder')
}
library(gapminder)

if(!'ggalt' %in% installed.packages()){
    install.packages('ggalt')
}
library(ggalt)


General theme and colour settings

Colour definition [ABB: why not include here all the colours used in the ggplots (e.g.“#f03b20”,“#cccccc”…)?]

fill_color <- '#ffffff'         # white
decoration_color <- '#b6b5b5'   # medium gray
text_color <- '#000000'         # black
main1_color <- '#FF0000'        # primary red
main2_color <- '#ff8d00'        # orange yellow
brewercolors <- 'YlOrRd'        # ColorBrew color scheme

Theme definition

bigmac_theme_light <- theme_tufte() +
    theme(
        panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        plot.title = element_text(size = 10, hjust = 0.2, color = text_color),
        axis.title.x = element_text(size = 8, hjust = 0.5, color = text_color),
        axis.title.y = element_text(angle = 0, size = 8, hjust = 0.5, color = text_color), #[ABB: Check with team]
        axis.text = element_text(colour = text_color, size = 6),
        axis.ticks = element_blank(),
        axis.line = element_line(colour = decoration_color, size=0.3), 
        panel.border = element_blank(),
        panel.grid = element_blank(),
        strip.text = element_text(size = 10, color = decoration_color),
        panel.background = element_blank(),
        strip.background =element_blank(),
        plot.background = element_blank(),
        legend.text = element_text(size = 6, hjust = 0.5, color = text_color), 
        legend.position = c(0.8, 0.9),
        legend.key = element_blank(),
        legend.title = element_blank() 
)

theme_set(bigmac_theme_light)


Data retrieval and preprocessing

Prepare the libraries needed for appropriate table rendering:

if (!'knitr' %in% installed.packages()) {
    install.packages('knitr')
} 
library(knitr)

if (!'kableExtra' %in% installed.packages()) {
    install.packages('kableExtra')
} 
library(kableExtra)


Read the .csv file into a data.frame, select columns of interest and inspect the initial data structure:

bigmac <- read.csv("Big Mac Index.csv")
bigmac <- bigmac[, c('date','name','dollar_price','USD_adjusted','iso_a3','adj_price')]
kable_styling(kable(head(bigmac)), full_width = F)
date name dollar_price USD_adjusted iso_a3 adj_price
2000-04-01 Argentina 2.500000 NA ARG NA
2000-04-01 Australia 1.541667 NA AUS NA
2000-04-01 Brazil 1.648045 NA BRA NA
2000-04-01 Canada 1.938775 NA CAN NA
2000-04-01 Switzerland 3.470588 NA CHE NA
2000-04-01 Chile 2.451362 NA CHL NA
dim(bigmac)
## [1] 1162    6
str(bigmac)
## 'data.frame':    1162 obs. of  6 variables:
##  $ date        : Factor w/ 29 levels "2000-04-01","2001-04-01",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ name        : Factor w/ 57 levels "Argentina","Australia",..: 1 2 5 7 48 8 9 13 14 16 ...
##  $ dollar_price: num  2.5 1.54 1.65 1.94 3.47 ...
##  $ USD_adjusted: num  NA NA NA NA NA NA NA NA NA NA ...
##  $ iso_a3      : Factor w/ 56 levels "ARE","ARG","AUS",..: 2 3 6 7 8 9 10 13 14 16 ...
##  $ adj_price   : num  NA NA NA NA NA NA NA NA NA NA ...

Coerce column date from character to Date:

bigmac$date <- as.Date(bigmac$date)

Add column continent via the library gapminder:

bigmac <- data.table(bigmac)
continents <- gapminder[ ,c(1,2)]
continents <- continents[!duplicated(continents),]
bigmac <- merge(bigmac, continents, by.x='name', by.y='country')

Remove NAs and check:

bigmac <- bigmac[complete.cases(bigmac), ]
sum(is.na(bigmac))
## [1] 0

Inspect processed dataset:

dim(bigmac)
## [1] 479   7
str(bigmac)
## Classes 'data.table' and 'data.frame':   479 obs. of  7 variables:
##  $ name        : Factor w/ 57 levels "Argentina","Australia",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ date        : Date, format: "2011-07-01" "2012-01-01" ...
##  $ dollar_price: num  4.84 4.64 4.16 3.82 3.88 ...
##  $ USD_adjusted: num  1.011 0.756 0.489 0.316 0.286 ...
##  $ iso_a3      : Factor w/ 56 levels "ARE","ARG","AUS",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ adj_price   : num  3.15 2.99 3.01 3.26 3.12 ...
##  $ continent   : Factor w/ 5 levels "Africa","Americas",..: 2 2 2 2 2 2 2 2 2 2 ...
##  - attr(*, ".internal.selfref")=<externalptr> 
##  - attr(*, "sorted")= chr "name"


Summarise bigmac dataset by:

and filter by:

storing for every case into a separate data.frame:

bigmac_year <- bigmac %>% group_by(year(date), name) %>% summarize(AvgDollarPrice = mean(dollar_price))
kable_styling(kable(head(bigmac_year)), full_width = F)
year(date) name AvgDollarPrice
2011 Argentina 4.839685
2011 Australia 4.943724
2011 Brazil 6.162429
2011 Canada 5.000793
2011 Chile 3.997839
2011 China 2.273080
bigmac_continent <- bigmac %>% group_by(continent, date) %>% summarize(AvgDollarPrice = mean(dollar_price))
kable_styling(kable(head(bigmac_continent)), full_width = F)
continent date AvgDollarPrice
Africa 2011-07-01 2.618379
Africa 2012-01-01 2.510627
Africa 2012-07-01 2.496640
Africa 2013-01-01 2.351654
Africa 2013-07-01 2.314576
Africa 2014-01-01 2.296257
bigmac_usa <- bigmac %>% filter(name == "United States") %>% group_by(date) %>% 
    summarize(AvgDollarPrice = mean(dollar_price))
kable_styling(kable(head(bigmac_usa)), full_width = F)
date AvgDollarPrice
2011-07-01 4.065000
2012-01-01 4.197220
2012-07-01 4.327500
2013-01-01 4.367396
2013-07-01 4.556667
2014-01-01 4.624167
bigmac_europe <- bigmac %>% filter(continent == "Europe")
kable_styling(kable(head(bigmac_europe)), full_width = F)
name date dollar_price USD_adjusted iso_a3 adj_price continent
Czech Republic 2011-07-01 4.072401 0.452 CZE 3.670336 Europe
Czech Republic 2012-01-01 3.448013 0.144 CZE 3.417339 Europe
Czech Republic 2012-07-01 3.341410 0.057 CZE 3.400976 Europe
Czech Republic 2013-01-01 3.722159 0.137 CZE 3.673327 Europe
Czech Republic 2013-07-01 3.490596 0.024 CZE 3.526358 Europe
Czech Republic 2014-01-01 3.473215 0.027 CZE 3.584248 Europe
bigmac_2014 <- bigmac %>% filter(year(date) == "2012") %>% group_by(name) %>%
    summarize(AvgDollarPrice = mean(dollar_price), AvgDollarAdjusted = mean(USD_adjusted)) %>%
    mutate(Up = as.factor(is.positive(AvgDollarAdjusted)))
kable_styling(kable(head(bigmac_2014)), full_width = F)
name AvgDollarPrice AvgDollarAdjusted Up
Argentina 4.398785 0.6225 TRUE
Australia 4.808358 0.0460 TRUE
Brazil 5.307322 0.9105 TRUE
Canada 4.827628 0.1435 TRUE
Chile 4.104388 0.4525 TRUE
China 2.444080 -0.0315 FALSE


Additional preprocessing [ABB: Check where this fits into the overall workflow] [ABB: Check compatibility code snippet below vs rest of code (adj_price is not used anywhere else? NAs?)]

bigmac4 <- read.csv("Big Mac Index.csv")

bigmac5 <- bigmac4[,c('name','date','dollar_price')]
bigmac6 <- bigmac4[,c('name','date','adj_price')]

bigmac7 <- dcast(bigmac5, name~date)
## Warning in dcast(bigmac5, name ~ date): The dcast generic in data.table has been
## passed a data.frame and will attempt to redirect to the reshape2::dcast; please
## note that reshape2 is deprecated, and this redirection is now deprecated as
## well. Please do this redirection yourself like reshape2::dcast(bigmac5). In the
## next version, this warning will become an error.
## Using 'dollar_price' as value column. Use 'value.var' to override
bigmac8 <- dcast(bigmac6, name~date)
## Warning in dcast(bigmac6, name ~ date): The dcast generic in data.table has been
## passed a data.frame and will attempt to redirect to the reshape2::dcast; please
## note that reshape2 is deprecated, and this redirection is now deprecated as
## well. Please do this redirection yourself like reshape2::dcast(bigmac6). In the
## next version, this warning will become an error.
## Using 'adj_price' as value column. Use 'value.var' to override
bigmac_USprice <- bigmac7[,c(1,30)]
bigmac_adjusted <- bigmac8[,c(1,30)]

names(bigmac_USprice)[2] <- "USPrice_2018"
names(bigmac_adjusted)[2] <- "adjusted_2018"

Merg_data <- merge(bigmac_USprice, bigmac_adjusted)
Merg_data <- Merg_data[complete.cases(Merg_data), ] # ABB: [Check if this line is ok]
Merg_data <- arrange(Merg_data, desc(USPrice_2018))

levels <- Merg_data$name
Merg_data$name <- factor(Merg_data$name, levels = levels)

Exploratory visualisation

  1. the big mac index, all countries highlight US
ggplot() +
    geom_line(data = bigmac, aes(date, dollar_price, group = name), color = "#cccccc", alpha = 0.7, lwd = 0.25) +
    geom_line(data = bigmac_usa, aes (date, AvgDollarPrice), lwd = 0.35, show.legend = FALSE, color = "#f03b20") +
    labs(title = "The BigMac Index", x = '', y = '$')


  1. highlight continent trends
ggplot() +
    geom_line(data = bigmac, aes(date, dollar_price, group = name), color = "#cccccc", alpha = 0.7, lwd = 0.25) +
    geom_line(data = bigmac_continent, aes(date, AvgDollarPrice, color = continent), lwd = 0.5, show.legend = TRUE) +
    geom_line(data = bigmac_usa, aes (date, AvgDollarPrice), lwd = 0.9, show.legend = FALSE,
              color = "#f03b20", linetype = "dashed") +
    scale_color_manual(values = c("#FDD835", "#F39C12", "#E67E22", "#F5B041", "#E67E22"))  +
    labs(title = "The BigMac Index", x = '', y = '$')


  1. facet by continent to reveal contients above and below US price
ggplot() +
    geom_line(data = transform(bigmac, name = NULL), aes (date, dollar_price, group = iso_a3),
              alpha = 0.7, lwd = 0.2, colour = decoration_color) +
    geom_line(data = bigmac_usa, aes (date, AvgDollarPrice), lwd = 0.3, show.legend = FALSE, color = "#f03b20") +
    theme(strip.background = element_blank(), strip.placement = "outside") +
    facet_wrap(~ continent) +
    xlab('') +
    ylab('$')


  1. tufte boxplot to inspect the average distribution of each continent. We can see Europe’s higher price
ggplot() +
    geom_tufteboxplot(data = bigmac_continent, aes(continent, AvgDollarPrice), color = '#ff8d00') +
    xlab('') +
    ylab('$')


  1. All European countries, USA in red
ggplot() +
    geom_line(data = bigmac_europe, aes(date, dollar_price, group = name),
              alpha = 0.7, lwd = 0.2, color = decoration_color) +
    geom_line(data = bigmac_usa, aes (date, AvgDollarPrice), lwd = 0.3, show.legend = FALSE, color= "#f03b20") +
    theme(strip.background = element_blank(), strip.placement = "outside") +
    labs(title = "Europe", x = '', y = '$')


  1. Facet Europe to see each country’s trend
ggplot() +
    geom_line(data = bigmac_europe, aes(date, dollar_price, group = name),
              alpha = 0.8, lwd = 0.2, color = decoration_color) +
    geom_line(data = bigmac_usa, aes (date, AvgDollarPrice), lwd = 0.3, show.legend = FALSE, color= "#f03b20") +
    theme(strip.background = element_blank(), strip.placement = "outside") +
    labs(title = "Price of a BigMac", x = '', y = '$') +
    facet_wrap(~ name)


  1. Interactive Distribution 2014
p <- ggplot(bigmac_2014, aes(name, AvgDollarAdjusted, color = Up, text = paste(name,'\n', AvgDollarAdjusted))) +
    geom_point(show.legend = FALSE, size = 2, alpha = 0.75) +
    scale_color_manual(values = c('#ff8d00', '#FF0000')) +
    theme(axis.title.x = element_blank(), axis.text.x = element_blank()) +
    labs(title = "BigMac Price 2014", x = '', y = '$') +
    geom_hline(yintercept = 0, alpha=.5, lwd = 0.3, color = decoration_color)

ggplotly(p, tooltip = c("text")) %>% layout(showlegend = FALSE)


Additionally: dumbbell plot

ggplot(Merg_data, aes(name, x = USPrice_2018, xend = adjusted_2018)) + labs(x = "Price", y = "") +
    geom_vline(xintercept = mean(Merg_data$USPrice_2018, na.rm = T), color = decoration_color, linetype = "dashed") +
    geom_dumbbell(aes(y = name), color = main2_color, colour_xend = "red", size_xend = 2) +
    geom_text(data = Merg_data, aes(x = 5.5, y = "Egypt", label = "GDP Adjusted Price ($)"),
              color = "red", hjust = 0, size = 3) +
    geom_point(data = Merg_data, aes(x = 5.4, y = "Egypt"), color = "red", size = 2)